/**************************************************************
This selects two samples - graduates in CTE fields from CCs,
and any student in a two-year school that shows up in a "program"
with more than 10 credits.
**************************************************************/

%macro sample_select ; 

/* this macro creates CTE flag  */
%macro cte_flag ; 

degcip_2dig in ('10','11','12','13','15','19',
                '43','44','46','47','48','49','51','52')


%mend ;
    
%macro deglevl ; 
    if DegreeLevel='06' then deglevl_code='08';
    else if DegreeLevel='01' then deglevl_code = '01' ;
    else if DegreeLevel='02' then deglevl_code = '02' ;
    else if DegreeLevel='03' then deglevl_code = '04' ;
    else if DegreeLevel='04' then deglevl_code ='06' ;
    else if DegreeLevel='05' then deglevl_code = '08' ;
    else if DegreeLevel in ('11','12','13') then deglevl_code = '03' ;
    else if DegreeLevel='21' then deglevl_code = '05' ;
    else if DegreeLevel in ('31','33') then deglevl_code = '07' ;
    else if DegreeLevel in ('41','53') or (programcode = '130401' and DegreeLevel = '51') then deglevl_code = '18';
    else if DegreeLevel in ('51','52') then deglevl_code ='17' ;
%mend deglevl;
/***********
step 1: find graduates in CTE fields with sub-bacc degrees
*************/

data graduates (keep = pik degcip_2dig institutioncode year quarter 
				qtime_grad deglevl_code state flag_cte  ) /view=graduates; 

    set INPUTS.coedu_degree_1997_2016_qpik ; 
    
    %deglevl; 
    
    year=reportyear ; 
    if reportterm = 2 then do ;
        quarter = 4 ;
    end;
    else if reportterm in (1,3,4,5) then do ;
        if reportterm=1 then quarter = 3 ;
        if reportterm in(3,5) then quarter = 1 ;
        if reportterm=4 then quarter = 2 ;
    end;
    

    state = '08' ;

    qtime_grad = %qtime(year,quarter,location=datastep) ;

    degcip_2dig = substr(programcode,1,2) ;
    
    flag_cte = %cte_flag; 
    
    if deglevl_code in ('01','02','03','04') ; 

    /* restricting to degrees earned after 2001 */
    if year >= 2001 ; 
run; 
    
data nongraduates (keep = pik degcip_2dig institutioncode  credithours reportterm reportyear year quarter state cte_flag deglevl_code ); 
    set INPUTS.coedu_enrollment_1997_2007_qpik
        INPUTS.coedu_enrollment_2008_2016_qpik ;
    
    state = '08';
    %deglevl;
    degcip_2dig = substr(programcode,1,2) ;
    credithours = input(cumulativecredithours,5.) ;
    
    if credithours >= 10 ; 
    
    year=reportyear ; 
    if reportterm = 2 then do ;
        quarter = 4 ;
    end;
    else if reportterm in (1,3,4,5) then do ;
        if reportterm=1 then quarter = 3 ;
        if reportterm in (3,5) then quarter = 1 ;
        if reportterm=4 then quarter = 2 ;
    end;
    cte_flag = %cte_flag; 
    qtime = %qtime(year,quarter,location=datastep) ;
/* restricting to lower level degrees */
    if year >= 2001  & deglevl_code in ('01','02','03','04')  ; 
run;

proc freq data=nongraduates ; 
    tables credithours*deglevl_code ; 
run;
   
proc sort data=nongraduates out=nongrads; 
    by pik institutioncode deglevl_code ; 
run;

proc summary data=nongrads nway ; 
    class pik institutioncode deglevl_code ; 
    var cte_flag ; 
    output out=cte_flag (drop=_TYPE_ _FREQ_)
        max(cte_flag) = cte_flag;
run;
    
data nongraduates_finalenroll ;
    merge nongrads (drop=cte_flag in=a) cte_flag (in=b);
    by pik institutioncode deglevl_code ;
    
    if last.institutioncode and cte_flag = 1 then output ;   
run;
    
proc sort data=graduates out=grads nodupkey; 
    by pik institutioncode year quarter; 
run;

proc freq data=grads ; 
    tables flag_cte ; 
    title 'CTE FLAG GRADS' ;
run;

/*********************************************************
    This creates a dataset with all our possible
    observations - graduates and non-graduates, who will 
    be the control group
**********************************************************/
    
data OUTPUTS.all_students_cdhe;  
    merge grads (in=a) 
        nongraduates_finalenroll;
    by pik institutioncode year quarter ; 

    graduate = a ; 
    /* KEEP TWO TYPES OF PEOPLE: 
       CTE GRADUATES AND THEN NON GRADUATES */
    if (cte_flag and graduate) or not graduate ; 
run;
    
proc sort data=OUTPUTS.all_students_cdhe;
    by pik institutioncode year quarter ; 
run;

proc freq data=OUTPUTS.all_students_cdhe;  
    tables graduate ; 
    tables degcip_2dig*graduate ;
run;
    
/*****************************************
    Our next step: to extract all the terms 
    that a person is enrolled (to flag those quarters 
******************************************/
    
 
    

    
%mend sample_select ; 

%sample_select; 
